#### Comparison of yield distribution and KDD effect graphs

## Setup environment

setwd("~/research/coffee-dataverse/src")

do.origspec <- T # For determining priors
source("load.R", encoding="iso-8859-1")

## Calculate graph data

allpdf <- data.frame()
for (output.suffix in c("-eq24-orig", "-eq24rational-orig")) {
    library(dplyr)
    ## Estimate naive and single-year-correction yields
    df2 <- df %>% group_by(Munic�pio) %>% arrange(year, .by_group=T) %>% summarize(total.produce=total.produce, total.harvest=total.harvest, bearing.syc=pmax(total.harvest, c(0, total.harvest[-length(total.harvest)])), edd1000=edd1000)

    df2$yield.naive <- df2$total.produce / df2$total.harvest
    df2$yield.syc <- df2$total.produce / df2$bearing.syc

    ## Collect Bayesian estimate yields
    sumstats <- read.csv(paste0("../data/sumstats", output.suffix, ".csv"))
    names(sumstats)[1] <- "fullparam"
    sumstats$param <- gsub("\\[\\d+\\]", "", gsub("_coeff\\[(\\d+)\\]", "_coeff\\1", sumstats$fullparam))

    ## Estimate two ways
    sumstats2 <- sumstats %>% filter(param == 'logbearing') %>% group_by(fullparam, param, region) %>%
        summarize(mean=sum(mean) / length(mean), se_mean=sum(se_mean) / length(se_mean))
    sumstats3 <- sumstats %>% filter(param == 'soft_harvest_part') %>% group_by(fullparam, param, region) %>%
        summarize(mean=sum(mean) / length(mean), se_mean=sum(se_mean) / length(se_mean))

    df2$bearing.bayes <- NA
    for (region in unique(sumstats2$region)) {
        rows.ss <- which(sumstats2$region == region & sumstats2$param == 'logbearing')
        rows.df <- which(df2$Munic�pio == region & !is.na(df2$edd1000))
        if (length(rows.ss) != length(rows.df))
            print(paste("Skip", region))
        if (length(rows.ss) == 0)
            next

        df2$bearing.bayes[rows.df] = exp(sumstats2$mean[rows.ss] + sumstats2$se_mean[rows.ss]^2 / 2)
    }

    df2$yield.bayes <- df2$total.produce / pmax(df2$bearing.bayes, df2$total.harvest)

    df2$harvest.part.bayes <- NA # harvest portion
    for (region in unique(sumstats3$region)) {
        rows.ss <- which(sumstats3$region == region & sumstats3$param == 'soft_harvest_part')
        rows.df <- which(df2$Munic�pio == region & !is.na(df2$edd1000))
        if (length(rows.ss) != length(rows.df))
            print(paste("Skip", region))
        if (length(rows.ss) == 0)
            next

        df2$harvest.part.bayes[rows.df] = sumstats3$mean[rows.ss]
    }

    df2$yield.bayes2 <- df2$total.produce / (df2$total.harvest / df2$harvest.part.bayes)

    ## Report geometric mean
    df2$yield.bayes.geo <- exp((log(df2$yield.bayes) + log(df2$yield.bayes2)) / 2)

    ## Collect all data for display
    pdf <- data.frame(yield=exp(seq(log(.01), log(4), length.out=400)))
    pdf$cdf.naive <- NA
    pdf$cdf.syc <- NA
    pdf$cdf.bayes <- NA
    for (ii in 1:nrow(pdf)) {
        pdf$cdf.naive[ii] <- mean(df2$yield.naive <= pdf$yield[ii], na.rm=T)
        pdf$cdf.syc[ii] <- mean(df2$yield.syc <= pdf$yield[ii], na.rm=T)
        pdf$cdf.bayes[ii] <- mean(df2$yield.bayes.geo <= pdf$yield[ii], na.rm=T)
    }

    ## Produce yield distribution graph
    library(ggplot2)
    library(reshape2)

    ggplot(melt(pdf, id.vars='yield'), aes(yield, value, colour=variable)) +
        geom_line() + scale_x_log10(expand=c(0, 0)) + theme_bw() +
        theme(legend.position="bottom") + ylab("Empirical distribution") + xlab("Yields (MT / Ha)") +
        scale_colour_discrete(name=NULL, labels=c("Quantity / Harvested", "Quantity / Single-year Bearing", "Quantity / Bayesian Bearing"))
    ggsave(paste0("../figures/sims/yielddist", output.suffix, ".pdf"), width=6.5, height=5)

    ## KDD Range figure

    library(rstan)

    df$price.delayed <- get.price.delayed(df)

    medprice <- 3.26 #round(median(df$price.delayed[df$year >= 1985], na.rm=T), 2)
    lowprice <- round(quantile(df$price.delayed[df$year >= 1985], .1, na.rm=T), 2)
    medyield <- median(df2$yield.bayes.geo[df$year >= 1985], na.rm=T)

    load(paste0("../data/combined-betagamma", output.suffix, ".RData"))

    ## Get names
    param <- get.fit.params(output.suffix)

    ## Simulate for each number of KDDs
    pdf <- data.frame()
    for (price in c(medprice, lowprice)) {
        for (kdd in 0:100) {
            minyield <- la$gamma[, 1, which(param == "cost_harvest")] / price
            yield <- exp(log(medyield) + la$gamma[, 1, which(param == "yield_coeff3")] * kdd / 1000)
            yielddelta <- yield * la$gamma[, 1, which(param == "yield_uncertainty")]

            harvest <- (yield + yielddelta - minyield) / (2*yielddelta)
            harvest[harvest < 0] <- 0
            harvest[harvest > 1] <- 1

            product <- ((yield + yielddelta)^2 - minyield^2) / (4*yielddelta)
            product[product > yield] <- yield[product > yield]
            obsyield <- product / harvest

            profit <- price * product - la$gamma[, 1, which(param == "cost_harvest")] * harvest

            pdf <- rbind(pdf, data.frame(kdd, price, outcome=c("Yield (MT / Ha)", "Yield (MT / Ha)", "Harvested Area (fraction)", "Profit (USD / Ha)"), istrue=c(T, F, T, T), mu=c(mean(yield), mean(obsyield), mean(harvest), mean(profit)), cilo=c(quantile(yield, .025), quantile(obsyield, .025), quantile(harvest, .025), quantile(profit, .025)), cihi=c(quantile(yield, .975), quantile(obsyield, .975), quantile(harvest, .975), quantile(profit, .975))))
        }
    }

    ## Produce graph

    pdf$outcome <- factor(pdf$outcome, levels=c("Yield (MT / Ha)", "Harvested Area (fraction)", "Profit (USD / Ha)"))
    pdf$fullprice <- paste(pdf$price, "($2010/kg)")

    library(ggplot2)

    ggplot(pdf, aes(x=kdd, y=mu, fill=fullprice, colour=fullprice, group=paste(fullprice, istrue))) +
        facet_grid(outcome ~ ., scales="free") +
        geom_line(data=subset(pdf, !istrue | outcome != "Yield (MT / Ha)"), aes(linetype=fullprice)) +
        geom_line(data=subset(pdf, istrue & outcome == "Yield (MT / Ha)"), aes(colour="Biophysical yield", fill="Biophysical yield", linetype="Biophysical yield")) +
        geom_ribbon(data=subset(pdf, !istrue | outcome != "Yield (MT / Ha)"),
                    aes(group=paste0(istrue, price), ymin=cilo, ymax=cihi), alpha=.5) +
        theme_bw() + scale_x_continuous(expand=c(0, 0)) +
        scale_linetype_manual(name="International\nPrice:", values=c("dashed", "solid", "twodash")) +
        xlab("Extreme degree-days") + ylab("") + scale_colour_discrete(name="International\nPrice:") + scale_fill_discrete(name="International\nPrice:")

    ggsave(paste0("../figures/sims/kddrange", output.suffix, ".pdf"), width=6.5, height=5)

    allpdf <- rbind(allpdf, cbind(pdf, expectations=ifelse(output.suffix == "-eq24-orig", "Adaptive", "Rational")))
}

## Produce combined graph

allpdf$expectations <- paste(allpdf$expectations, "Expectations")

ggplot(allpdf, aes(x=kdd, y=mu, fill=fullprice, colour=fullprice, group=paste(fullprice, istrue))) +
    facet_grid(outcome ~ expectations, scales="free") +
    geom_line(data=subset(allpdf, is.finite(mu) & (!istrue | outcome != "Yield (MT / Ha)")), aes(linetype=fullprice)) +
    geom_line(data=subset(allpdf, is.finite(mu) & (istrue & outcome == "Yield (MT / Ha)")), aes(colour="Biophysical yield", fill="Biophysical yield", linetype="Biophysical yield")) +
    geom_ribbon(data=subset(allpdf, is.finite(mu) & (!istrue | outcome != "Yield (MT / Ha)")),
                aes(group=paste0(istrue, price), ymin=cilo, ymax=cihi), alpha=.5) +
    theme_bw() + scale_x_continuous(expand=c(0, 0)) +
    scale_linetype_manual(name="International\nPrice:", values=c("dashed", "solid", "twodash")) +
    xlab("Extreme degree-days") + ylab("") + scale_colour_discrete(name="International\nPrice:") + scale_fill_discrete(name="International\nPrice:") +
    theme(panel.spacing.x=unit(1, "lines"))
ggsave(paste0("../figures/sims/kddrange.pdf"), width=6.5, height=5.5)
